Introduction
In this doc, we will be looking at the banksy clusters and try to interpret them based on their identity, and other characteristics.
Load the data
The objects are here:
RDS object with Leiden Clusters: /xchip/beroukhimlab/coja/Spatial_PLGG/data/Xenium/Xenium_Objects/total_spatial_banksy_clusters_20240719_092550.rds
- `/xchip/beroukhimlab/coja/Spatial_PLGG/data/Xenium/Xenium_Objects/total_spatial_banksy_embeddings/total_spatial_banksy_clusters_subset_20240719_092550.rds`
Cell type clusters annotation: /xchip/beroukhimlab/coja/Spatial_PLGG/data/Xenium/banksy/banksy_cell_cluster_annotation.tsv
Code
# load the banksy embeddings
banksy_embeddings = readRDS (ifelse (
workdir == "/xchip/beroukhimlab/" ,
paste0 (workdir,'coja/Spatial_PLGG/data/Xenium/Xenium_Objects/total_spatial_banksy_clusters_subset_20240719_092550.rds' ),
'/Users/youyun/Documents/HMS/PhD/beroukhimlab/plgg/data/total_spatial_banksy_clusters_subset_20240719_092550.rds'
))
colData (banksy_embeddings)$ sample_id = factor (colData (banksy_embeddings)$ sample_id)
# Load the cell type clusters annotation
prelim_annotation_res1 = fread (paste0 (
workdir,'coja/Spatial_PLGG/data/Xenium/banksy/banksy_cell_cluster_annotation.tsv'
))
# specify the cluster names
# this is mainly to keep track of which resolution we are using
res = 1
cell_clust_name = gsub ('.0$' ,'' ,sprintf ('clust_Harmony_BANKSY_lam0.2_k50_res%.1f' ,res))
niche_clust_name = gsub ('lam0.2' ,'lam0.8' ,cell_clust_name)
# # filter the banksy object to only include the cells with annotated cell types
# annotated_cell_clusters = sort(unique(prelim_annotation_res1$cluster))
# banksy_embeddings = banksy_embeddings[,colData(banksy_embeddings)[,cell_clust_name] %in% annotated_cell_clusters]
Setting up Neighborhood Analysis
Here we will look at the niches in the data and annotate them using the cell types of self and neighbors.
Code
k_geom = 15
locs = spatialCoords (banksy_embeddings)
knn = dbscan:: kNN (x = locs, k = k_geom)
knnDT <- data.table (
from = rep (seq_len (nrow (knn$ id)), k_geom),
from_id = rep (rownames (knn$ id), k_geom),
to = as.vector (knn$ id),
to_id = rownames (knn$ id)[as.vector (knn$ id)],
distance = as.vector (knn$ dist)
)
cell_cell_type_dt = data.table (
cell_id = rownames (colData (banksy_embeddings)),
cell_type = as.numeric (colData (banksy_embeddings)[,cell_clust_name]),
niche_type = as.numeric (colData (banksy_embeddings)[,niche_clust_name]),
sample_id = colData (banksy_embeddings)$ sample_id,
histology = colData (banksy_embeddings)$ histology
)
cell_type_cols = setdiff (colnames (prelim_annotation_res1),'cluster' )
niche_self_neighbor_dt = knnDT[,c ('from' ,'to' ) : = NULL ][
cell_cell_type_dt[,.(cell_id,cell_type)], on = c ('to_id' = 'cell_id' ),
cell_type_to : = cell_type
][cell_cell_type_dt, on = c ('from_id' = 'cell_id' )][
,.(
cell_id = from_id, cell_type, niche_type, sample_id,
histology, neighbor_cell_type = cell_type_to
)][
prelim_annotation_res1, on = c ('cell_type' = 'cluster' )
][
prelim_annotation_res1, on = c ('neighbor_cell_type' = 'cluster' ),
paste0 (cell_type_cols,' Neighbors' ) : = mget (paste0 ('i.' ,cell_type_cols))
][order (cell_id)]
niche_self_dt = unique (niche_self_neighbor_dt[
,mget (c ('cell_id' ,'cell_type' ,'niche_type' ,'sample_id' ,'histology' ,cell_type_cols))
])
Build the color palette for all niches and cell types downstream
Code
cluster_classes = sort (unique (niche_self_neighbor_dt$ niche_type))
niche_color_manual = structure (
pals:: polychrome ()[c (1 : length (cluster_classes))],
names = cluster_classes
)
cell_type_to_test = 'Coarse Cell Type'
cell_type_to_test = 'Fine Cell Type'
cell_type_to_test = 'Fine Cell Type UMAP'
cell_type_class = sort (unique (prelim_annotation_res1[[cell_type_to_test]]))
cell_type_color_manual = structure (
pals:: alphabet ()[c (1 : length (cell_type_class))],
names = cell_type_class
)
Looking at the niches in space
Code
graph_dt = melt (
as.data.table (cbind (
spatialCoords (banksy_embeddings),
colData (banksy_embeddings)[c (
'sample_id' ,'cell_id' , 'histology' ,
grep ('Harmony.*.res1$' ,clusterNames (banksy_embeddings), value = TRUE )
)]
)),id.vars = c ('sample_id' ,'cell_id' ,'histology' ,'sdimx' ,'sdimy' ),
variable.name = 'Cluster Type' ,value.name = 'Cluster ID'
)
g = ggplot (
graph_dt[` Cluster Type ` == 'clust_Harmony_BANKSY_lam0.8_k50_res1' ],
aes (x= sdimy, y= sdimx, color= ` Cluster ID ` )
) +
facet_wrap (histology ~ sample_id, nrow = 2 , scale = 'free_x' ) +
geom_point (size = 0.1 , alpha = 0.5 ) +
scale_color_manual (values = niche_color_manual) +
theme_classic () +
theme (
legend.position = "bottom" ,
axis.text.x= element_blank (),
axis.text.y= element_blank (),
axis.ticks= element_blank (),
text = element_text (size = 15 ),
strip.text = element_text (size = 10 ),
strip.background = element_blank ()
) + guides (
color = guide_legend (nrow = 3 , byrow = TRUE )
) +
coord_flip ()
g
Code
# p = ggplotly(g)
# saveWidget(p, paste0(workdir,'/youyun/plgg/code/niche/niche_identity_spatial.html'), selfcontained = TRUE)
What are the niches?
What is the distribution of niches?
Across samples and histologies
Code
ggplot (niche_self_dt) +
geom_bar (aes (x = sample_id, fill = as.factor (niche_type)), position = 'fill' ) +
facet_wrap (~ histology, scale = 'free_x' ) +
theme_minimal () + theme (
axis.text.x = element_text (angle = 45 , hjust = 1 ),
text = element_text (size = 15 ), legend.position = 'bottom'
) + scale_fill_manual (values = niche_color_manual) +
labs (
title = 'Distribution of Niches Across Samples' ,
x = 'Sample ID' ,
y = 'Proportion of Cells'
) + guides (
fill = guide_legend (title = 'Niche Type' , nrow = 2 )
)
ggplot (niche_self_dt) +
geom_bar (aes (x = sample_id, fill = as.factor (niche_type)), position = 'stack' ) +
facet_wrap (~ histology, scale = 'free' ) +
theme_minimal () + theme (
axis.text.x = element_text (angle = 45 , hjust = 1 ),
text = element_text (size = 15 ), legend.position = 'bottom'
) + scale_fill_manual (values = niche_color_manual) +
labs (
title = 'Number of Niches Across Samples' ,
x = 'Sample ID' ,
y = 'Proportion of Cells'
) + guides (
fill = guide_legend (title = 'Niche Type' , nrow = 2 )
)
Are there niches exclusive or prevalent across samples and histologies?
Some statistical testing
Code
histology_niche_enrichment = pairwise_categorical_enrichment_test (
niche_self_dt$ niche_type, niche_self_dt$ histology
)
setnames (
histology_niche_enrichment, 1 : 2 ,
c ('niche_type' , 'histology' )
)
ggplot (histology_niche_enrichment) +
geom_point (aes (
x = as.numeric (niche_type), y = histology, size = N_var1var2,
# color by odds_ratios column capped at percentile 95
fill = pmin (
quantile (log (odds_ratios), 0.95 , na.rm = TRUE ),
log (odds_ratios), na.rm = TRUE
), color = ifelse (
q_value < 0.05 & odds_ratios > 1 ,
'significant' ,'not significant'
)
), shape = 21 , stroke = 1 ) +
theme (
text = element_text (size = 15 ), legend.position = 'right'
) + scale_fill_viridis_c () +
scale_x_continuous (breaks = as.numeric (unique (histology_niche_enrichment$ niche_type))) +
scale_size_area (max_size = 10 ) +
scale_color_manual (values = c ('significant' = 'red' ,'not significant' = 'black' )) +
labs (
title = 'Enrichment of Niches Across Histologies' ,
x = 'Niche Type' ,
y = 'Histology' ,
size = 'Number of Niches' ,
fill = 'log OR' ,
color = 'Significance'
) + guides (
fill = guide_legend (title = 'log OR' ),
color = guide_legend (title = 'Significance' )
)
Is each niche exclusive to one self-cell-type?
Code
ggplot (niche_self_dt) +
geom_bar (aes (x = niche_type, fill = as.factor (get (cell_type_to_test))), position = 'fill' ) +
theme_minimal () + theme (
axis.text.x = element_text (angle = 45 , hjust = 1 ),
text = element_text (size = 15 ), legend.position = 'bottom'
) + scale_fill_manual (values = cell_type_color_manual) +
labs (
title = 'Distribution of Self Cell Type By Niche' ,
x = 'Niche Type' ,
y = 'Proportion of Cell Type'
) + guides (
fill = guide_legend (title = cell_type_to_test, nrow = 3 )
)
Code
ggplot (
unique (niche_self_neighbor_dt[
,mget (c ('cell_id' ,'cell_type' ,'niche_type' ,'sample_id' ,'histology' ,cell_type_cols))
])
) + geom_bar (aes (x = niche_type, fill = as.factor (get (cell_type_to_test))), position = 'stack' ) +
theme_minimal () + theme (
axis.text.x = element_text (angle = 45 , hjust = 1 ),
text = element_text (size = 15 ), legend.position = 'bottom'
) + scale_fill_manual (values = cell_type_color_manual) +
labs (
title = 'Distribution of Self Cell Type By Niche' ,
x = 'Niche Type' ,
y = 'Proportion of Cell Type'
) + guides (
fill = guide_legend (title = cell_type_to_test, nrow = 3 )
)
Some statistical testing
Code
self_cell_type_niche_enrichment = pairwise_categorical_enrichment_test (
niche_self_dt$ niche_type, niche_self_dt[[cell_type_to_test]]
)
setnames (
self_cell_type_niche_enrichment, 1 : 2 ,
c ('niche_type' , 'cell_type' )
)
ggplot (self_cell_type_niche_enrichment) +
geom_point (aes (
x = as.numeric (niche_type), y = cell_type, size = N_var1var2,
# color by odds_ratios column capped at percentile 95
fill = pmin (
quantile (log (odds_ratios), 0.95 , na.rm = TRUE ),
log (odds_ratios), na.rm = TRUE
), color = ifelse (
q_value < 0.05 & odds_ratios > 1 ,
'significant' ,'not significant'
)
), shape = 21 , stroke = 1 ) +
theme (
text = element_text (size = 15 ), legend.position = 'right'
) + scale_fill_viridis_c () +
scale_x_continuous (breaks = as.numeric (unique (self_cell_type_niche_enrichment$ niche_type))) +
scale_size_area (max_size = 10 ) +
scale_color_manual (values = c ('significant' = 'red' ,'not significant' = 'black' )) +
labs (
title = 'Enrichment of Self Cell Type By Niche' ,
x = 'Niche Type' ,
y = 'Cell Type' ,
size = 'Number of Niches' ,
fill = 'log OR' ,
color = 'Significance'
) + guides (
fill = guide_legend (title = 'log OR' ),
color = guide_legend (title = 'Significance' )
)
Does each niche represent a group of cells that are isotropic or anisotropic?
Code
ggplot (
niche_self_neighbor_dt[
,.(proportion = .N/ 15 ), .(
cell_id, niche_type,
neighbor_cell_type = get (paste0 (cell_type_to_test,' Neighbors' ))
)
]
) + geom_boxplot (aes (
x = neighbor_cell_type, y = proportion,
fill = neighbor_cell_type
)) +
facet_wrap (~ niche_type) +
theme_minimal () + theme (
axis.text.x = element_text (angle = 45 , hjust = 1 ),
text = element_text (size = 15 ), legend.position = 'bottom'
) + scale_fill_manual (values = cell_type_color_manual) +
labs (
title = 'Distribution of Neighbor Cell Type By Niche' ,
x = 'Neighbor Cell Type' ,
y = 'Proportion of Neighbor Cell Type'
) + guides (
fill = guide_legend (title = cell_type_to_test, nrow = 3 )
)
Some statistical testing
Code
neighbor_cell_type_niche_enrichment = pairwise_categorical_continuous_enrichment_test (dcast (
niche_self_neighbor_dt[
! is.na (get (paste0 (cell_type_to_test,' Neighbors' ))),.(proportion = .N/ 15 ), .(
cell_id, niche_type,
neighbor_cell_type = get (paste0 (cell_type_to_test,' Neighbors' ))
)
],
cell_id + niche_type ~ neighbor_cell_type,
value.var = 'proportion' , fill = 0
))
ggplot (neighbor_cell_type_niche_enrichment) +
geom_point (aes (
x = as.numeric (niche_type), y = continuous_cols, size = N_var2,
fill = delta,
color = ifelse (q_value < 0.05 & as.numeric (delta) > 0 , 'significant' ,'not significant' )
), shape = 21 , stroke = 1 ) +
theme (
text = element_text (size = 15 ), legend.position = 'right'
) + scale_fill_viridis_c () +
scale_x_continuous (breaks = as.numeric (unique (neighbor_cell_type_niche_enrichment$ niche_type))) +
scale_size_area (max_size = 10 ) +
scale_color_manual (values = c ('significant' = 'red' ,'not significant' = 'black' )) +
labs (
title = 'Enrichment of Neighbor Cell Type By Niche' ,
x = 'Niche Type' ,
y = 'Neighbor Cell Type' ,
size = '% Neighboring Cells' ,
fill = 'Delta' ,
color = 'Significance'
) + guides (
fill = guide_legend (title = 'Delta' ),
color = guide_legend (title = 'Significance' )
)
[1] "The following categories are left out because there are less than 30 observations: 18, 21, 23, 24, 25, 28"
Pulling everything together to understand the niches
Code
library (scales)
including_niches = intersect (
intersect (
unique (histology_niche_enrichment$ niche_type),
unique (self_cell_type_niche_enrichment$ niche_type)
),
unique (neighbor_cell_type_niche_enrichment$ niche_type)
)
enrichment_overview_dt = merge (
rbind (
self_cell_type_niche_enrichment[
niche_type %in% including_niches,
.(cell_type, size = N_var1var2/ sum (N_var1var2), q_value, test_statistic = odds_ratios),
.(niche_type)
][, data_type := 'Self Cell Type' ],
neighbor_cell_type_niche_enrichment[
niche_type %in% including_niches,
.(cell_type = continuous_cols, size = N_var2, q_value, test_statistic = delta),
.(niche_type)
][, data_type := 'Neighbor Cell Type' ]
),histology_niche_enrichment[
niche_type %in% including_niches
,.(enriched_histologies = paste0 (
histology[q_value < 0.05 & odds_ratios > 1 ], collapse = ', \n '
)), .(niche_type)
], by = 'niche_type'
)
ggplot (enrichment_overview_dt) + geom_point (aes (
x = data_type, y = cell_type, size = size,
# fill = q_value,
color = ifelse (
# if it's the fishers test, we use a odds ratio > 1 as positive enrichment
(q_value < 0.05 & data_type == 'Self Cell Type' & test_statistic > 1 ) |
# if it's the continuous test, we use a delta > 0 as positive enrichment
(q_value < 0.05 & data_type == 'Neighbor Cell Type' & test_statistic > 0 ),
'significant' ,'not significant'
)
), shape = 21 , stroke = 1 ) +
facet_wrap (as.numeric (niche_type) ~ enriched_histologies, nrow = 3 ) +
theme (
axis.text.x = element_text (angle = 45 , hjust = 1 ),
text = element_text (size = 20 ),
legend.position = 'bottom' ,
strip.text = element_text (size = 15 ),
strip.background = element_blank (),
panel.spacing = unit (1 , "lines" )
) + #scale_fill_viridis_c() +
scale_size_area (max_size = 10 ) +
scale_x_discrete (
labels = c ('Self' ,'Neighbor' ),
limits = c ('Self Cell Type' ,'Neighbor Cell Type' )
) +
scale_color_manual (values = c ('significant' = 'red' ,'not significant' = 'black' )) +
labs (
title = 'Enrichment of Cell Types By Niche' ,
x = 'Niche Type' ,
y = 'Cell Type' ,
size = 'Proportion Cell Type' ,
fill = 'q-value' ,
color = 'Significance'
) + guides (
fill = guide_legend (title = 'q-value' ),
color = guide_legend (title = 'Significance' )
)
Code
ggplot (enrichment_overview_dt) + geom_point (aes (
x = data_type, y = cell_type, size = size,
# fill = q_value,
color = ifelse (
# if it's the fishers test, we use a odds ratio > 1 as positive enrichment
(q_value < 0.05 & data_type == 'Self Cell Type' & test_statistic > 1 ) |
# if it's the continuous test, we use a delta > 0 as positive enrichment
(q_value < 0.05 & data_type == 'Neighbor Cell Type' & test_statistic > 0 ),
'significant' ,'not significant'
)
), shape = 21 , stroke = 1 ) +
facet_wrap (enriched_histologies ~ as.numeric (niche_type), nrow = 3 ) +
theme (
axis.text.x = element_text (angle = 45 , hjust = 1 ),
text = element_text (size = 20 ),
legend.position = 'bottom' ,
strip.text = element_text (size = 15 ),
strip.background = element_blank (),
panel.spacing = unit (1 , "lines" )
) + #scale_fill_viridis_c() +
scale_size_area (max_size = 10 ) +
scale_x_discrete (
labels = c ('Self' ,'Neighbor' ),
limits = c ('Self Cell Type' ,'Neighbor Cell Type' )
) +
scale_color_manual (values = c ('significant' = 'red' ,'not significant' = 'black' )) +
labs (
title = 'Enrichment of Cell Types By Niche' ,
x = 'Niche Type' ,
y = 'Cell Type' ,
size = 'Proportion Cell Type' ,
fill = 'q-value' ,
color = 'Significance'
) + guides (
fill = guide_legend (title = 'q-value' ),
color = guide_legend (title = 'Significance' )
)